all <- read_csv(here("data","sensors", "sensor-data_all.csv")) %>%
clean_names() %>%
mutate(date_time=mdy_hms(date_time), #aapply lubridate to date/time column
date=format(date_time, '%m/%d/%Y'), #create only date column
time=format(date_time, '%H:%M:%S')) %>% #create only time column
select(site, sensor_number, date_time, date, time, temp_c, p_h) %>%
mutate(site=replace(site, site=="LOL", "Lompoc Landing"),
site=replace(site, site=="ALG", "Alegria"),
site=replace(site, site=="BML", "Bodega Bay")) #rename locations
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## Site = col_character(),
## `Sensor number` = col_double(),
## `Download date` = col_character(),
## `Calibration date` = col_character(),
## `Temp_(C)` = col_double(),
## `Voltage#1` = col_double(),
## TK = col_double(),
## `S(T)` = col_double(),
## `Eo(T)` = col_double(),
## pH = col_double(),
## `Date time` = col_character()
## )
#set site order for plotting (legend)
all$site <- factor(all$site, levels=c("Alegria", "Lompoc Landing", "Bodega Bay"))
all_filter <- all %>%
filter(p_h < 8.5) %>%
mutate(removeit = ifelse(site=="Alegria" & date_time<ymd_hms("2021-06-30 23:00:00"), "remove", "keep")) %>%
filter(removeit=="keep")
ggplot(all_filter, aes(x=date_time, y=p_h, group=site)) +
geom_line(aes(color=site), size=0.7) +
geom_point(aes(color=site), size=0.5) +
scale_x_datetime(breaks = scales::date_breaks("1 week"),
labels = date_format("%m/%d %H:%m")) +
xlab("Date time") +
ylab("pH") +
theme_bw() +
theme(legend.title=element_blank(),
axis.text.x=element_text(angle=45, vjust = 1, hjust=1, size=12),
axis.title.x=element_text(size=15),
axis.text.y=element_text(size=12),
axis.title.y=element_text(size=15),
legend.text=element_text(siz=12))
ggsave(here("figures", "sensors", "june-sept.png"), height=20, width=40, units="cm")
ggplot(all, aes(x=date_time, y=temp_c, group=site)) +
geom_line(aes(color=site), size=0.7) +
geom_point(aes(color=site), size=0.5) +
scale_x_datetime(breaks = scales::date_breaks("1 week"),
labels = date_format("%m/%d %H:%m")) +
xlab("Date time") +
theme_bw() +
theme(axis.text.x=element_text(angle=90))
ggsave(here("figures", "sensors","june-sept_temp.png"), height=20, width=40, units="cm")
bml <- all %>%
filter(site=="Bodega Bay")
ggplot(bml, aes(x=date_time)) +
geom_line(aes(y=p_h), color="red") +
geom_line(aes(y=temp_c), color="blue") + # Divide by 10 to get the same range than the temperature
scale_y_continuous(name = "pH", #first axis name
sec.axis = sec_axis(~., name="Temp (C)")) + #second axis name and features
scale_x_datetime(breaks = scales::date_breaks("1 week"),
labels = date_format("%m/%d %H:%m")) +
xlab("Date time") +
theme_bw() +
theme(axis.text.x=element_text(angle=90))
## Try it with pH and temp sorted as "groups"
bml2 <- bml %>%
pivot_longer(cols=temp_c:p_h,
names_to = "group",
values_to = "value")
ggplot(bml2, aes(x=date_time, y=value, group=group)) +
geom_line(aes(color=group), size=0.7) +
#geom_point(aes(color=group), size=0.5) +
scale_x_datetime(breaks = scales::date_breaks("1 week"),
labels = date_format("%m/%d %H:%m")) +
xlab("Date time") +
theme_bw() +
theme(axis.text.x=element_text(angle=90))
Let’s come back to this…
Sites:
Glen: number of days to extract
Interval:
bml_tide <- read_html("http://tide.arthroinfo.org/tideshow.cgi?tplotdir=horiz;gx=640;gy=240;caltype=ndp;type=mrare;interval=00%3A15;glen=150;units=feet;year=2021;month=05;day=31;hour=09;min=30;tzone=local;d_year=;d_month=01;d_day=01;d_hour=00;d_min=00;ampm24=24;site=Bodega%20Harbor%20entrance%2C%20California") %>%
html_elements("pre") %>% #select only the date, time, and tide values from the webpage
html_text2() %>% #convert list to data table
data.frame() %>% #convert table to data frame
mutate(date_tide = str_split(., pattern = "\n")) %>% #split into rows by each time point
unnest(date_tide) %>% #unnest into two columns
mutate(date_tide=as.factor(date_tide)) %>% #make column values factors
separate(date_tide, into = c("date", "space", "time", "time_zone", "tide"), sep="\\s") %>% #separate the values (separated by spaces) into their own columns
select(-"space", -".") %>% #remove the "space" (blank space) column and duplicated column created by unnest()
unite("time", "time", "time_zone", sep="\ ",) %>% #join together time and time zone
unite("date_time", "date", "time", sep="\ ") %>% #join together date and time/time zone
drop_na() %>% #remove final row with NA (not sure why that's even there)
mutate(date_time=ymd_hm(date_time), #apply lubridate to date/time column
tide=as.numeric(tide)) #coerce tide values from character to numeric
ggplot(bml_tide, aes(x=date_time, y=tide)) +
geom_line(size=0.7) +
scale_x_datetime(breaks = scales::date_breaks("2 days"),
labels = date_format("%m/%d %H:%m")) +
xlab("Date time") +
ylab("Tide height") +
theme_bw() +
theme(axis.text.x=element_text(angle=90))
ggsave(here("figures", "sensors", "bml_tide.png"), height=20, width=40, units="cm")
bml_all <- full_join(bml, bml_tide) %>%
drop_na(c(tide, p_h)) %>% #whoops, started off collecting data every 10 minutes and then switched to 15 minutes (so account for that by removing pH and tide values that don't overlap)
distinct(date_time, .keep_all = TRUE) #remove duplicated data (not sure where those came from)
## Joining, by = "date_time"
bml3 <- bml_all %>%
pivot_longer(cols=temp_c:tide,
names_to = "data",
values_to = "value")
ggplot(bml3, aes(x=date_time, y=value, group=data)) +
geom_line(aes(color=data), size=0.7) +
scale_x_datetime(breaks = scales::date_breaks("1 week"),
labels = date_format("%m/%d %H:%m")) +
xlab("Date time") +
theme_bw() +
theme(axis.text.x=element_text(angle=90))
ggsave(here("figures", "sensors", "bml_pH_temp_tide.png"), height=20, width=40, units="cm")
bml_detide <- bml_all %>%
filter(tide>-0.5) %>%
drop_na(c(p_h)) #drop observations that don't overlap (10 min vs 15 min sampling interval)
ggplot(bml_detide, aes(x=date_time)) +
geom_line(aes(y=tide), color="red") +
geom_line(aes(y=temp_c), color="blue") + # Divide by 10 to get the same range than the temperature
scale_x_datetime(breaks = scales::date_breaks("1 week"),
labels = date_format("%m/%d %H:%m")) +
xlab("Date time") +
theme_bw() +
theme(axis.text.x=element_text(angle=90))
bml_superfilter <- bml_all %>%
filter(date_time > ymd_hms("2021-07-19 23:00:00"),
date_time < ymd_hms("2021-07-26 23:00:00")) #7-30 to 8-06 is also interesting
#Looking at the mid July dates, looks like I might need to filter out tides below -0.2? But then if I look at other dates, it's not as bad. -0.4 might actually be a good decision.
y1 <- bml_superfilter$p_h
y2 <- bml_superfilter$temp_c
y3 <- bml_superfilter$tide
x <- bml_superfilter$date_time
highchart() %>%
hc_add_series(data = y3, dashStyle="solid") %>%
hc_add_series(data = y2, yAxis = 1) %>%
hc_yAxis_multiples(
list(lineWidth = 3, lineColor="#009E73", title=list(text="Tide cycle")),
list(lineWidth = 3, lineColor="#0072B2", title=list(text="Temperature"))) %>%
hc_xAxis(title = "Date", categories = x, breaks=10) %>%
hc_colors(c("#009E73","#0072B2"))
bml_detide_temp <- bml_detide %>%
filter(!(date_time >= ymd_hms("2021-07-10 06:00:00") & date_time <= ymd_hms("2021-07-10 07:30:00")),
!(date_time >= ymd_hms("2021-07-21 06:15:00") & date_time <= ymd_hms("2021-07-21 07:15:00")),
!(date_time >= ymd_hms("2021-07-22 06:15:00") & date_time <= ymd_hms("2021-07-22 08:00:00")),
!(date_time >= ymd_hms("2021-07-23 07:15:00") & date_time <= ymd_hms("2021-07-23 08:30:00")))
ggplot(bml_detide_temp, aes(x=date_time)) +
geom_line(aes(y=temp_c), color="red") +
geom_line(aes(y=tide), color="blue") + # Divide by 10 to get the same range than the temperature
scale_x_datetime(breaks = scales::date_breaks("1 day"),
labels = date_format("%m/%d %H:%m")) +
xlab("Date time") +
theme_bw() +
theme(axis.text.x=element_text(angle=90))
# bml_detide_temp_ph <- bml_detide_temp %>%
# arrange(date_time) %>% #make sure observations are in order by date/time
# mutate(diff3 = p_h - lag(p_h, default = first(p_h))) %>% #find difference between two subsequent pH measurements to identify anomalies
# filter(diff3>-0.5,
# diff3<0.5) #remove weird readings on 7/10
#
# ggplot(bml_detide_temp_ph, aes(x=date_time)) +
# geom_line(aes(y=p_h), color="#009E73") +
# geom_line(aes(y=temp_c), color="#D55E00") + # Divide by 10 to get the same range than the temperature
# geom_line(aes(y=tide), color="#0072B2") + # Divide by 10 to get the same range than the temperature
# scale_x_datetime(breaks = scales::date_breaks("1 week"),
# labels = date_format("%m/%d %H:%m")) +
# annotate(geom="text", x=as.POSIXct("2021-05-31 00:10:00"), y=14, hjust=0, label="Temp (C)", color="#D55E00", size=5) +
# annotate(geom="text", x=as.POSIXct("2021-08-16 00:10:00"), y=9, hjust=-0.1, label="pH", color="#009E73", size=5) +
# annotate(geom="text", x=as.POSIXct("2021-05-31 00:10:00"), y=6, hjust=0, label="Tide", color="#0072B2", size=5) +
# xlab("Date & time") +
# ylab("Value") +
# theme_bw() +
# theme(axis.text.x=element_text(angle=45, vjust = 1, hjust=1, size=12),
# axis.title.x=element_text(size=15),
# axis.text.y=element_text(size=12),
# axis.title.y=element_text(size=15))
#
# ggsave(here("figures", "sensors", "bml_detide_tide-temp-pH.png"), height=20, width=40, units="cm")
lol_tide <- read_html("http:///tide.arthroinfo.org/tideshow.cgi?tplotdir=horiz;gx=640;gy=240;caltype=ndp;type=mrare;interval=00%3A15;glen=150;units=feet;year=2021;month=06;day=14;hour=08;min=00;tzone=local;d_year=;d_month=01;d_day=01;d_hour=00;d_min=00;ampm24=24;site=Point%20Arguello%2C%20California") %>%
html_elements("pre") %>% #select only the date, time, and tide values from the webpage
html_text2() %>% #convert list to data table
data.frame() %>% #convert table to data frame
mutate(date_tide = str_split(., pattern = "\n")) %>% #split into rows by each time point
unnest(date_tide) %>% #unnest into two columns
mutate(date_tide=as.factor(date_tide)) %>% #make column values factors
separate(date_tide, into = c("date", "space", "time", "time_zone", "tide"), sep="\\s") %>% #separate the values (separated by spaces) into their own columns
select(-"space", -".") %>% #remove the "space" (blank space) column and duplicated column created by unnest()
unite("time", "time", "time_zone", sep="\ ",) %>% #join together time and time zone
unite("date_time", "date", "time", sep="\ ") %>% #join together date and time/time zone
drop_na() %>% #remove final row with NA (not sure why that's even there)
mutate(date_time=ymd_hm(date_time), #apply lubridate to date/time column
tide=as.numeric(tide)) #coerce tide values from character to numeric
ggplot(lol_tide, aes(x=date_time, y=tide)) +
geom_line(size=0.7) +
scale_x_datetime(breaks = scales::date_breaks("2 days"),
labels = date_format("%m/%d %H:%m")) +
xlab("Date time") +
ylab("Tide height") +
theme_bw() +
theme(axis.text.x=element_text(angle=90))
ggsave(here("figures", "sensors", "lol_tide.png"), height=20, width=40, units="cm")
lol_temp <- read_csv(here("data","sensors", "hobo_lol.csv")) %>%
filter(row_number()!=1) %>% # remove the first row
clean_names() %>%
rename(timestamp=x2,
temp_hobo_f=x3) %>%
mutate(date_time=ymd_hms(timestamp), #aapply lubridate to date/time column
date=format(date_time, '%m/%d/%Y'), #create only date column
time=format(date_time, '%H:%M:%S'), #create only time column
temp_hobo_c=((as.numeric(temp_hobo_f)-32)*(5/9))) %>% #convert F to C from HOBO temp data
select(temp_hobo_c,date_time) #select the only columns that really matter
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## `Plot Title: LOL` = col_character(),
## X2 = col_character(),
## X3 = col_character()
## )
# filter all data for LOL
lol <- all %>%
filter(site=="Lompoc Landing") %>%
distinct(date_time, .keep_all = TRUE) #remove duplicated data (not sure where those came from)
#join HOBO data with Durafet data
lol_all_1 <- full_join(lol, lol_temp) %>%
drop_na(c(temp_c, p_h)) %>% #remove hobo measurements taken outside of Durafet measurements
rename(temp_durafet_c=temp_c)
## Joining, by = "date_time"
#Check difference between Durafet and HOBO temperature measurements
lol_temp_diff <- lol_all_1 %>%
mutate(diff=temp_durafet_c-temp_hobo_c)
#Plot it up to check for anomalies
y1 <- lol_temp_diff$temp_durafet_c
y2 <- lol_temp_diff$temp_hobo_c
y3 <- lol_temp_diff$diff
x <- lol_temp_diff$date_time
highchart() %>%
hc_add_series(data = y1, dashStyle="solid") %>%
hc_add_series(data = y2, yAxis = 1) %>%
hc_add_series(data = y3, yAxis = 2) %>%
hc_yAxis_multiples(
list(lineWidth = 3, lineColor='#D55E00', title=list(text="durafet")),
list(lineWidth = 3, lineColor="#009E73", title=list(text="hobo")),
list(lineWidth = 3, lineColor="#0072B2", title=list(text="diff"))) %>%
hc_xAxis(title = "Date", categories = x, breaks=10) %>%
hc_colors(c("#D55E00","#009E73", "#0072B2"))
#Join pH, temp data with tide data
lol_all <- full_join(lol_all_1, lol_tide) %>%
drop_na(c(tide, p_h)) %>% #remove unnecessary tide values
rename(temp_c=temp_hobo_c)
## Joining, by = "date_time"
#Check if anomalous points in durafet vs hobo are due to tide cycle?
lol_check <- lol_all %>%
mutate(diff=temp_durafet_c-temp_c) %>%
filter(tide<3.232) #this value was extracted thanks to code later on
#filter((date_time > ymd_hms("2021-10-01 01:00:00") & date_time <= ymd_hms("2021-10-22 24:00:00")))
#create a highchart to assess
y1 <- lol_check$temp_durafet_c
y2 <- lol_check$temp_c
y3 <- lol_check$diff
x <- lol_check$date_time
highchart() %>%
hc_add_series(data = y1, dashStyle="solid") %>%
hc_add_series(data = y3, yAxis = 1) %>%
hc_add_series(data = y3, yAxis = 2) %>%
hc_yAxis_multiples(
list(lineWidth = 3, lineColor='#D55E00', title=list(text="durafet")),
list(lineWidth = 3, lineColor="#009E73", title=list(text="hobo")),
list(lineWidth = 3, lineColor="#0072B2", title=list(text="diff"))) %>%
hc_xAxis(title = "Date", categories = x, breaks=10) %>%
hc_colors(c("#D55E00","#009E73", "#0072B2"))
# Anomalous points aside, plot it "all" up
lol_plot <- lol_all %>%
pivot_longer(cols=temp_durafet_c:tide,
names_to = "data",
values_to = "value")
ggplot(lol_plot, aes(x=date_time, y=value, group=data)) +
geom_line(aes(color=data), size=0.7) +
scale_x_datetime(breaks = scales::date_breaks("1 week"),
labels = date_format("%m/%d %H:%m")) +
xlab("Date time") +
theme_bw() +
theme(axis.text.x=element_text(angle=90))
ggsave(here("figures", "sensors", "lol_pH_temp_tide.png"), height=20, width=40, units="cm")
lol_detide <- lol_all %>%
filter(tide>0.5)
ggplot(lol_detide, aes(x=date_time)) +
geom_line(aes(y=tide), color="red") +
geom_line(aes(y=temp_c), color="blue") +
scale_x_datetime(breaks = scales::date_breaks("1 week"),
labels = date_format("%m/%d %H:%m")) +
xlab("Date time") +
theme_bw() +
theme(axis.text.x=element_text(angle=90))
lol_check <- lol_detide %>%
filter(date_time < ymd_hms("2021-06-30 23:00:00")) %>%
arrange(date_time) %>% #make sure observations are in order by date/time
mutate(diff = temp_c - lag(temp_c, default = first(temp_c))) %>% #find difference between two subsequent temp measurements to identify anomalies
mutate(diff2 = temp_c - lead(temp_c, default = first(temp_c))) #find difference between two subsequent temp measurements to identify anomalies
#filter(diff < 1 | diff > -1 | diff2 < 1 | diff2 > -1)
#Thank you stas g (https://stats.stackexchange.com/a/164830) for this function
find_peaks <- function (x, m = 3){
shape <- diff(sign(diff(x, na.pad = FALSE)))
pks <- sapply(which(shape < 0), FUN = function(i){
z <- i - m + 1
z <- ifelse(z > 0, z, 1)
w <- i + m + 1
w <- ifelse(w < length(x), w, length(x))
if(all(x[c(z : i, (i + 2) : w)] <= x[i + 1])) return(i + 1) else return(numeric(0))
})
pks <- unlist(pks)
pks
}
pk <- find_peaks(lol_check$temp_c, m = 50) #set a high threshold bc we need it...
lol_check_peak <- lol_detide %>%
filter(date_time < ymd_hms("2021-06-30 23:00:00")) %>%
arrange(date_time) %>% #make sure observations are in order by date/time
mutate(diff = temp_c - lag(temp_c, default = first(temp_c))) %>% #find difference between two subsequent temp measurements to identify anomalies
mutate(diff2 = temp_c - lead(temp_c, default = first(temp_c))) %>% #find difference between two subsequent temp measurements to identify anomalies
#filter(diff < 1 | diff > -1 | diff2 < 1 | diff2 > -1)
mutate(peak = ifelse(row_number() %in% c(pk) == TRUE, 1, 0)) #if a row ID matches the row ID found by find_peaks, then call it "1" (if not, "0")
ggplot(lol_check_peak, aes(x=date_time)) +
geom_line(aes(y=tide), color="red") +
geom_line(aes(y=temp_c), color="blue") +
#geom_line(aes(y=diff), color="green") +
#geom_line(aes(y=diff2), color="orange") +
geom_point(aes(y=temp_c, color=ifelse(peak>0, "red", "black"), size=ifelse(peak>0, 2, 0))) + #if it's a peak, color it red and make it big
scale_x_datetime(breaks = scales::date_breaks("1 week"),
labels = date_format("%m/%d %H:%m")) +
xlab("Date time") +
theme_bw() +
theme(axis.text.x=element_text(angle=90)) +
scale_color_identity() +
scale_size_identity() +
geom_vline(aes(xintercept = date_time), lol_check_peak %>% filter(peak == 1)) + #if it's a peak, draw a vertical line
scale_y_continuous(breaks = round(seq(min(lol_check_peak$tide), max(lol_check_peak$tide), by = 0.5),1))
Looks like anything below a 1.5 tide height is definitely disconnected from the ocean
lol_detide_temp_ph <- lol_all %>%
filter(tide>1.5) %>%
arrange(date_time) %>% #make sure observations are in order by date/time
mutate(diff3 = p_h - lag(p_h, default = first(p_h))) #doesn't look like pH values jump extraordinarily
ggplot(lol_detide_temp_ph, aes(x=date_time)) +
geom_line(aes(y=p_h), color="blue") +
geom_line(aes(y=diff3), color="green") +
scale_x_datetime(breaks = scales::date_breaks("1 week"),
labels = date_format("%m/%d %H:%m")) +
xlab("Date time") +
theme_bw() +
theme(axis.text.x=element_text(angle=90))
#And plot it up
ggplot(lol_detide_temp_ph, aes(x=date_time)) +
geom_line(aes(y=p_h), color="#009E73") +
geom_line(aes(y=temp_c), color="#D55E00") + # Divide by 10 to get the same range than the temperature
geom_line(aes(y=tide), color="#0072B2") + # Divide by 10 to get the same range than the temperature
scale_x_datetime(breaks = scales::date_breaks("1 week"),
labels = date_format("%m/%d %H:%m")) +
annotate(geom="text", x=as.POSIXct("2021-09-24 00:01:00"), y=19, hjust=0, label="Temp (C)", color="#D55E00", size=5) +
annotate(geom="text", x=as.POSIXct("2021-6-14 00:01:00"), y=9, hjust=-0.1, label="pH", color="#009E73", size=5) +
annotate(geom="text", x=as.POSIXct("2021-6-14 00:01:00"), y=6, hjust=0, label="Tide", color="#0072B2", size=5) +
xlab("Date & time") +
ylab("Value") +
theme_bw() +
theme(axis.text.x=element_text(angle=45, vjust = 1, hjust=1, size=12),
axis.title.x=element_text(size=15),
axis.text.y=element_text(size=12),
axis.title.y=element_text(size=15))
ggsave(here("figures", "sensors", "lol_detide_tide-temp.png"), height=20, width=40, units="cm")
y1 <- lol_detide_temp_ph$p_h
y2 <- lol_detide_temp_ph$temp_c
y3 <- lol_detide_temp_ph$tide
x <- lol_detide_temp_ph$date_time
plot <- highchart() %>%
hc_add_series(data = y1, dashStyle="solid") %>%
hc_add_series(data = y2, yAxis = 1) %>%
hc_add_series(data = y3, yAxis = 2) %>%
hc_yAxis_multiples(
list(lineWidth = 3, lineColor='#D55E00', title=list(text="pH")),
list(lineWidth = 3, lineColor="#009E73", title=list(text="Temperature")),
list(lineWidth = 3, lineColor="#0072B2", title=list(text="Tide cycle"))) %>%
hc_xAxis(title = "Date", categories = x, breaks=10) %>%
hc_colors(c("#D55E00","#009E73", "#0072B2"))
htmlwidgets::saveWidget(widget = plot, file = here("figures", "sensors", "LOL_highchart.html"))
webshot::webshot(url = here("figures", "sensors", "LOL_highchart.html"), file = here("figures", "sensors", "LOL_highchart.png"), delay=3)
Well that was a bit of a waste of time
#look at July NOAA NDBC data from Santa Maria buoy
lol_wave <- read_csv(here("data","sensors", "wave-july2021.csv")) %>%
clean_names() %>%
filter(wvht<99.00) %>%
mutate(mn=mn+5) %>%
unite("date", "year", "month", "day", sep="/") %>%
unite("time", "hr", "mn", sep=":") %>%
unite("date_time", "date", "time", sep=" ") %>%
mutate(date_time=ymd_hm(date_time)) #apply lubridate to date and time
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## Year = col_double(),
## Month = col_double(),
## Day = col_double(),
## hr = col_double(),
## mn = col_double(),
## WSPD = col_double(),
## WVHT = col_double(),
## WTMP = col_double()
## )
#join this with the july sensor df
lol_july <- lol_all %>%
filter(date_time < ymd_hms("2021-08-01 00:00:00"),
date_time > ymd_hms("2021-07-01 00:00:00"))
lol_wave_join <- full_join(lol_july, lol_wave) %>%
fill(wvht, .direction = "updown") %>% #fill in empty wave measurements with previous values before
distinct(date_time, .keep_all = TRUE) %>% #remove duplicated data (not sure where those came from)
filter(date_time > ymd_hms("2021-07-20 00:00:00"),
date_time < ymd_hms("2021-07-25 00:00:00"))
## Joining, by = "date_time"
y1 <- lol_wave_join$temp_c
y2 <- lol_wave_join$wvht
y3 <- lol_wave_join$tide
x <- lol_wave_join$date_time
highchart() %>%
hc_add_series(data = y1, dashStyle="solid") %>%
hc_add_series(data = y2, yAxis = 1) %>%
hc_add_series(data = y3, yAxis = 1) %>%
hc_yAxis_multiples(
list(lineWidth = 3, lineColor='#D55E00', title=list(text="Temp")),
list(lineWidth = 3, lineColor="#009E73", title=list(text="Wave Height")),
list(lineWidth = 3, lineColor="#0072B2", title=list(text="Tide cycle"))) %>%
hc_xAxis(title = "Date", categories = x, breaks=10) %>%
hc_colors(c("#D55E00",
"#009E73",
"#0072B2"))
#List of tide height candidates (at these heights, temperature started dropping again):
#2.321
#2.128
#2.120
#1.987
#2.277
#2.587
#2.529
#2.291
#2.315
#2.163
#2.54
#2.72
#2.47
#2.57
#2.43
#2.56
#2.46
#2.68
#2.68
The wave data isn’t a magic bullet, but it does look like I might be overprocessing the data if I make the tide height as high as 3.0. Based on the July data (especially keeping in mind temperature changes will be most apparent in the middle of the day when the sun is out in full force), I’m going to remove any data below a tide height of 2.0
lol_superfilter <- lol_all %>%
#filter(tide>2.3) %>%
filter(date_time > ymd_hms("2021-07-01 01:00:00"),
date_time < ymd_hms("2021-07-10 23:00:00"))
#3.232 looks good, 3.29 is also a solid candidate for tide filtering
#2.3, 2.68 are also good candidates - I want to make sure I'm not over-filtering
y1 <- lol_superfilter$temp_c
y2 <- lol_superfilter$temp_c
y3 <- lol_superfilter$tide
x <- lol_superfilter$date_time
highchart() %>%
hc_add_series(data = y2, dashStyle="solid") %>%
#hc_add_series(data = y2, yAxis = 1) %>%
hc_add_series(data = y3, yAxis = 1) %>%
hc_yAxis_multiples(
list(lineWidth = 3, lineColor='#D55E00', title=list(text="pH")),
list(lineWidth = 3, lineColor="#009E73", title=list(text="Temp")),
list(lineWidth = 3, lineColor="#0072B2", title=list(text="Tide cycle"))) %>%
hc_xAxis(title = "Date", categories = x, breaks=10) %>%
hc_colors(c("#D55E00",
"#009E73",
"#0072B2"))
Nevermind it wasn’t a waste of time! Highcharts are super useful! Looks like the pool is definitely disconnected from the ocean at a tide lower than a +3.0…maybe a +3.02 (but possibly even +3.2 or +3.4)… but to be safe, let’s stick with +2.0, I don’t want to overfilter the data.
lol_detided <- lol_detide_temp_ph %>%
filter(tide>3.232) %>%
filter(!(date_time >= ymd_hms("2021-07-31 13:30:00") & date_time <= ymd_hms("2021-07-31 14:00:00")),
(date_time != ymd_hms("2021-09-15 09:15:00")),
!(date_time >= ymd_hms("2021-09-16 01:30:00") & date_time <= ymd_hms("2021-09-16 02:45:00")), #anomalous points here identified by comparing durafet and hobo temp logger values
!(date_time >= ymd_hms("2021-08-01 13:45:00") & date_time < ymd_hms("2021-08-01 15:00:00"))) %>%
drop_na() #remove times with tides but without pH values
alg_tide <- read_html("http://tide.arthroinfo.org/tideshow.cgi?tplotdir=horiz;gx=640;gy=240;caltype=ndp;type=mrare;interval=00%3A15;glen=98;units=feet;year=2021;month=06;day=12;hour=01;min=00;tzone=local;d_year=;d_month=01;d_day=01;d_hour=00;d_min=00;ampm24=24;site=Gaviota%2C%20California") %>%
html_elements("pre") %>% #select only the date, time, and tide values from the webpage
html_text2() %>% #convert list to data table
data.frame() %>% #convert table to data frame
mutate(date_tide = str_split(., pattern = "\n")) %>% #split into rows by each time point
unnest(date_tide) %>% #unnest into two columns
mutate(date_tide=as.factor(date_tide)) %>% #make column values factors
separate(date_tide, into = c("date", "space", "time", "time_zone", "tide"), sep="\\s") %>% #separate the values (separated by spaces) into their own columns
select(-"space", -".") %>% #remove the "space" (blank space) column and duplicated column created by unnest()
unite("time", "time", "time_zone", sep="\ ",) %>% #join together time and time zone
unite("date_time", "date", "time", sep="\ ") %>% #join together date and time/time zone
drop_na() %>% #remove final row with NA (not sure why that's even there)
mutate(date_time=ymd_hm(date_time), #apply lubridate to date/time column
tide=as.numeric(tide)) #coerce tide values from character to numeric
ggplot(alg_tide, aes(x=date_time, y=tide)) +
geom_line(size=0.7) +
scale_x_datetime(breaks = scales::date_breaks("2 days"),
labels = date_format("%m/%d %H:%m")) +
xlab("Date time") +
ylab("Tide height") +
theme_bw() +
theme(axis.text.x=element_text(angle=90))
ggsave(here("figures", "sensors", "alg_tide.png"), height=20, width=40, units="cm")
# filter all data for ALG
alg <- all %>%
filter(site=="Alegria")
alg_all <- full_join(alg, alg_tide)
## Joining, by = "date_time"
alg_plot <- alg_all %>%
pivot_longer(cols=temp_c:tide,
names_to = "data",
values_to = "value")
ggplot(alg_plot, aes(x=date_time, y=value, group=data)) +
geom_line(aes(color=data), size=0.7) +
scale_x_datetime(breaks = scales::date_breaks("1 week"),
labels = date_format("%m/%d %H:%m")) +
xlab("Date time") +
theme_bw() +
theme(axis.text.x=element_text(angle=90))
ggsave(here("figures", "sensors", "alg_pH_temp_tide.png"), height=20, width=40, units="cm")
alg_desand <- alg_all %>%
filter(date_time > ymd_hms("2021-06-18 01:00:00")) %>% #super weird, but pH looks like it stabilizes at 1AM post sand
drop_na() %>% #remove rows with only tide values
distinct(date_time, .keep_all = TRUE) #remove duplicated data (not sure where those came from)
ggplot(alg_desand, aes(x=date_time, y=p_h)) +
geom_line(size=0.7) +
scale_x_datetime(breaks = scales::date_breaks("1 week"),
labels = date_format("%m/%d %H:%m")) +
xlab("Date time") +
theme_bw() +
theme(axis.text.x=element_text(angle=90))
alg_highchart <- alg_desand %>%
filter(date_time >= ymd_hms("2021-07-17 01:00:00") & date_time <= ymd_hms("2021-07-27 01:00:00")) %>%
filter(tide>0.9)
y1 <- alg_highchart$p_h
y2 <- alg_highchart$temp_c
y3 <- alg_highchart$tide
x <- alg_highchart$date_time
highchart() %>%
hc_add_series(data = y1, dashStyle="solid") %>%
hc_add_series(data = y3, yAxis = 1) %>%
#hc_add_series(data = y3, yAxis = 2) %>%
hc_yAxis_multiples(
list(lineWidth = 3, lineColor='#D55E00', title=list(text="pH")),
#list(lineWidth = 3, lineColor="#009E73", title=list(text="Temp")),
list(lineWidth = 3, lineColor="#0072B2", title=list(text="Tide cycle"))) %>%
hc_xAxis(title = "Date", categories = x) %>%
hc_colors(c("#D55E00",
"#009E73",
"#0072B2"))
These points look fine (and filtering should be easy since the sensor is always connected to the ocean) - but the high chart is suggesting a pattern around 0.00 tide (so let’s try removing everything below 0.00… actually, let’s try 0.6 since some of those pH values are quite low and consistently dropping at low tide it seems)
Tried 0.00, but still seeing steep drop in pH with tide cycle Tried 0.60, but still seeing steep drop in pH Tried 0.80, but still seeing some drop in pH 0.90 appears to be OK at not creating massive drop in pH but still having some reasonable cycle
alg_detide <- alg_desand %>%
filter(tide>0.9)
ggplot(alg_detide, aes(x=date_time)) +
geom_line(aes(y=p_h), color="#009E73") +
geom_line(aes(y=temp_c), color="#D55E00") + # Divide by 10 to get the same range than the temperature
geom_line(aes(y=tide), color="#0072B2") + # Divide by 10 to get the same range than the temperature
scale_x_datetime(breaks = scales::date_breaks("4 days"),
labels = date_format("%m/%d %H:%m")) +
annotate(geom="text", x=as.POSIXct("2021-6-18 00:01:00"), y=19, hjust=0, label="Temp (C)", color="#D55E00", size=5) +
annotate(geom="text", x=as.POSIXct("2021-6-18 00:01:00"), y=10, hjust=-0.1, label="pH", color="#009E73", size=5) +
annotate(geom="text", x=as.POSIXct("2021-6-18 00:01:00"), y=-1.5, hjust=0, label="Tide", color="#0072B2", size=5) +
xlab("Date & time") +
ylab("Value") +
theme_bw() +
theme(axis.text.x=element_text(angle=45, vjust = 1, hjust=1, size=12),
axis.title.x=element_text(size=15),
axis.text.y=element_text(size=12),
axis.title.y=element_text(size=15))
ggsave(here("figures", "sensors", "alg_detide.png"), height=20, width=40, units="cm")
There’s still some noise at a few dates, let’s filter those dates out of the data ### Remove (or smooth?) time periods with a LOT of noise
alg_no_noise <- alg_detide %>%
filter(!(date_time > ymd_hms("2021-07-17 11:00:00") & date_time < ymd_hms("2021-07-17 19:30:00"))) %>% #this is a chunk of time with a lot of noise
filter(!(date_time %in% (ymd_hms("2021-06-19 10:15:00", #remove time points with significant jumps in the wrong direction
"2021-06-29 14:15:00",
"2021-06-29 15:30:00",
"2021-06-30 11:45:00",
"2021-06-30 14:15:00",
"2021-06-30 16:15:00",
"2021-06-30 19:15:00",
"2021-07-02 06:15:00",
"2021-07-02 07:45:00",
"2021-07-02 10:45:00",
"2021-07-04 09:45:00",
"2021-07-04 12:00:00",
"2021-07-04 15:15:00"))))
y1 <- alg_no_noise$p_h
y3 <- alg_no_noise$tide
x <- alg_no_noise$date_time
highchart() %>%
hc_add_series(data = y1, dashStyle="solid") %>%
hc_add_series(data = y3, yAxis = 1) %>%
hc_yAxis_multiples(
list(lineWidth = 3, lineColor='#D55E00', title=list(text="pH")),
list(lineWidth = 3, lineColor="#0072B2", title=list(text="Tide cycle"))) %>%
hc_xAxis(title = "Date", categories = x) %>%
hc_colors(c("#D55E00",
"#0072B2"))
#join all sites into one df (and remove diff columns)
ph_all <- merge_recurse(list(alg_no_noise, lol_detided, bml_detide_temp)) %>%
select(-diff3)
#set order for sites
ph_all$site <- factor(ph_all$site, levels=c("Lompoc Landing","Alegria","Bodega Bay"))
#set custom color for sites
pal <- c(
"Alegria" = "#D55E00",
"Lompoc Landing" = "#009E73",
"Bodega Bay" = "#0072B2"
)
ggplot(ph_all, aes(x=date_time, y=p_h, group=site)) +
geom_line(aes(color=site), size=0.7, alpha=0.8) +
scale_color_manual(values = pal) + #color lines by custom site color palette
scale_x_datetime(breaks = scales::date_breaks("1 week"),
labels = date_format("%m/%d %H:%m")) +
xlab("Date time") +
ylab("pH") +
theme_bw() +
theme(axis.text.x=element_text(angle=45, vjust = 1, hjust=1, size=12),
axis.title.x=element_text(size=15),
axis.text.y=element_text(size=12),
axis.title.y=element_text(size=15),
legend.position = "none")
ggsave(here("figures", "sensors", "ph_all.png"), height=20, width=40, units="cm")
# For presentations, highlight one line at a time per site
#BML focal
ph_all$site <- factor(ph_all$site, levels=c("Lompoc Landing","Alegria","Bodega Bay"))
ggplot(ph_all, aes(x=date_time, y=p_h, group=site)) +
geom_line(aes(color=site, alpha=site), size=0.7) +
scale_color_manual(values = pal) + #color lines by custom site color palette
scale_x_datetime(breaks = scales::date_breaks("1 week"),
labels = date_format("%m/%d %H:%m")) +
xlab("Date time") +
ylab("pH") +
theme_bw() +
theme(axis.text.x=element_text(angle=45, vjust = 1, hjust=1, size=12),
axis.title.x=element_text(size=15),
axis.text.y=element_text(size=12),
axis.title.y=element_text(size=15),
legend.position = "none") +
scale_alpha_manual(values=c(0.5,0.5,1))
ggsave(here("figures", "sensors", "ph_bml.png"), height=20, width=40, units="cm")
#LOL focal
ph_all$site <- factor(ph_all$site, levels=c("Alegria","Bodega Bay", "Lompoc Landing"))
ggplot(ph_all, aes(x=date_time, y=p_h, group=site)) +
geom_line(aes(color=site, alpha=site), size=0.7) +
scale_color_manual(values = pal) + #color lines by custom site color palette
scale_x_datetime(breaks = scales::date_breaks("1 week"),
labels = date_format("%m/%d %H:%m")) +
xlab("Date time") +
ylab("pH") +
theme_bw() +
theme(axis.text.x=element_text(angle=45, vjust = 1, hjust=1, size=12),
axis.title.x=element_text(size=15),
axis.text.y=element_text(size=12),
axis.title.y=element_text(size=15),
legend.position = "none") +
scale_alpha_manual(values=c(0.5,0.5,1))
ggsave(here("figures", "sensors", "ph_lol.png"), height=20, width=40, units="cm")
#ALG focal
ph_all$site <- factor(ph_all$site, levels=c("Lompoc Landing","Bodega Bay", "Alegria"))
ggplot(ph_all, aes(x=date_time, y=p_h, group=site)) +
geom_line(aes(color=site, alpha=site), size=0.7) +
scale_color_manual(values = pal) + #color lines by custom site color palette
scale_x_datetime(breaks = scales::date_breaks("1 week"),
labels = date_format("%m/%d %H:%m")) +
xlab("Date time") +
ylab("pH") +
theme_bw() +
theme(axis.text.x=element_text(angle=45, vjust = 1, hjust=1, size=12),
axis.title.x=element_text(size=15),
axis.text.y=element_text(size=12),
axis.title.y=element_text(size=15),
legend.position = "none") +
scale_alpha_manual(values=c(0.5,0.5,1))
ggsave(here("figures", "sensors", "ph_alg.png"), height=20, width=40, units="cm")
# Save the "final" df to a .csv file
write.csv(ph_all, here("data", "sensors", "ph_clean.csv"), row.names = FALSE)
Now, for some “Chan et al 2017” style analyses
ph_7.8 <- ph_all %>%
mutate(threshold = ifelse(p_h<7.8, "below", "above")) %>%
group_by(site, threshold) %>%
summarize(tot_observations = n()) %>%
ungroup() %>%
add_row(site="Alegria", threshold="below", tot_observations=0) #add this row because Alegria has 0 observations below 7.8 (for now...)
## `summarise()` has grouped output by 'site'. You can override using the `.groups` argument.
#Get total number of observations - use double square brackets because it's a tibble
lol_n <- ph_7.8[[1,3]] + ph_7.8[[2,3]]
bml_n <- ph_7.8[[3,3]] + ph_7.8[[4,3]]
alg_n <- ph_7.8[[5,3]] + ph_7.8[[6,3]]
#Get frequency of pH values below 7.8
lol_7.8 <- ph_7.8[[2,3]]/lol_n
bml_7.8 <- ph_7.8[[4,3]]/bml_n
alg_7.8 <- ph_7.8[[6,3]]/alg_n
# Calculate min, max, average pH values for each site
alg_min <- (min(alg_no_noise$p_h))
alg_max <- (max(alg_no_noise$p_h))
alg_median <- (median(alg_no_noise$p_h))
alg_mean <- (mean(alg_no_noise$p_h))
alg_sd <- (sd(alg_no_noise$p_h))
alg_cv <- (alg_sd/alg_mean)*100 #coefficient of variation
lol_min <- (min(lol_detided$p_h))
lol_max <- (max(lol_detided$p_h))
lol_median <- (median(lol_detided$p_h))
lol_mean <- (mean(lol_detided$p_h))
lol_sd <- (sd(lol_detided$p_h))
lol_cv <- (lol_sd/lol_mean)*100 #coefficient of variation
bml_min <- (min(bml_detide_temp$p_h))
bml_max <- (max(bml_detide_temp$p_h))
bml_median <- (median(bml_detide_temp$p_h))
bml_mean <- (mean(bml_detide_temp$p_h))
bml_sd <- (sd(bml_detide_temp$p_h))
bml_cv <- (bml_sd/bml_mean)*100 #coefficient of variation
ph_table <- tribble(
~"Site", ~"Min pH", ~"Max pH", ~"Median pH", ~"CV",
"Bodega Marine Lab", bml_min, bml_max, bml_median, bml_cv,
"Lompoc Landing", lol_min, lol_max, lol_median, lol_cv,
"Alegria", alg_min, alg_max, alg_median, alg_cv)
ph_table %>%
gt() %>%
fmt_number(
columns = c("Min pH":"Median pH"),
rows = everything(),
decimals = 2) %>%
data_color(
columns = c("Site"),
colors = scales::col_factor( # <- bc it's a factor
palette = c("#D55E00","#0072B2","#009E73"),
domain = c("Bodega Marine Lab", "Lompoc Landing", "Alegria"))) %>%
gtsave(here("figures", "sensors", "ph_table.png"))
At BML, sampling interval started at every 10 minutes and then switched to 15 minutes for all sites
Filtering for tide heights based on temp/pH patterns - All points at ALG were removed at tide heights below 0.9 - All points at LOL were removed at tide heights below 2.0 (3.232 was also a good option, but I don’t want to remove more values than need be… ask about this) - All points at BML were removed at tide heights below -0.5
Removed additional anomalous time points (noise or major jumps, especially in wrong direction) - 20 from BML - All data collected before 06/18/2021 at ALG, plus 46 extra points - 9 from LOL
lol_cycle <- lol_all %>%
drop_na() %>% #remove times with tides but without pH values
filter(!(date_time >= ymd_hms("2021-07-31 13:30:00") & date_time <= ymd_hms("2021-07-31 14:00:00")), #remove anomalous points (major jumps in pH)
!(date_time >= ymd_hms("2021-08-01 13:45:00") & date_time <= ymd_hms("2021-08-01 15:00:00")),
!(date_time >= ymd_hms("2021-06-26 16:45:00") & date_time <= ymd_hms("2021-06-26 18:00:00")),
!(date_time == ymd_hms("2021-07-13 08:00:00")),
!(date_time == ymd_hms("2021-08-19 03:30:00")),
!(date_time == ymd_hms("2021-09-15 09:15:00")),
!(date_time == ymd_hms("2021-08-20 00:15:00"))) %>%
mutate(cycle=ifelse(tide>=3.232, "high", "low"))
figure <- function(df_input,name) {
m <- list(
l = 10,
r = 10,
b = 30,
t = 30,
pad = 4
)
fig <- plot_ly(width = 2000)
fig <- fig %>%
add_trace(data = df_input, y=~p_h, x = ~date_time, name = "pH", yaxis = "y1", mode = "lines", type = "scatter", line=list(color="green"))
y2 <- list(
tickfont = list(color = "red"),
titlefont = list(color = "red"),
overlaying = "y",
side = "left",
anchor="free",
position=0.02,
title = "Temperature (\u00B0C)")
fig <- fig %>%
add_trace(data =df_input, y=~temp_c, x = ~date_time, name = "Temperature (\u00B0C)", yaxis = "y2", mode = "lines", type = "scatter", line=list(color="red"))
fig <- fig %>%
layout(
title = name, yaxis2 = y2, #yaxis3 = y3,
xaxis = list(title = 'Date',
domain = c(0.06, 1),
tick0 = "2021-06-14 08:00:00",
dtick = 7*86400000.0,
tickformat="%d-%b"),
yaxis = list(title = "pH",
tickfont = list(color = "green"),
titlefont = list(color = "green"),
side="left",
anchor="free",
position=0.05),
showlegend = FALSE,
margin = m
)
return(fig)
}
#save it
lol_cycle_fig <- figure(lol_cycle,"Lompoc Landing")
orca(lol_cycle_fig, file="lol-cycle-full.png") #save plotly image
filesstrings::move_files("lol-cycle-full.png", here("figures", "sensors"), overwrite=TRUE) #move the image to the "Figures" folder
## 1 file moved. 0 failed.
Alternatively, check out https://plotly.com/r/multiple-axes/
Next steps: 1. assess the greatest pH and temperature range in a 24 hour, 1 week period and monthly averages for LOL
Re-filter all pH data based on HOBO temp data (especially, make annotations next to every single point/range that was filtered out, for future QC)
Talk to someone about overfiltering vs underfiltering LOL data - look at code chunk 595 (3.232 tide) vs 530 (2.0 tide)